#'
#'  Project title:     'Sovereign Risk and Government Change: Elections, Ideology and Experience'
#'  Authors:           Sarah M. Brooks; Raphael Cunha; Layna Mosley
#'  File description:  Prepares daily data on EMBI and CDS spreads for event study
#'  Output:            'EMBI Spreads and Elections.Rdata' & 'CDS Spreads and Elections.Rdata'
#'

# my_packages <- c("tidyverse", "reshape2", "countrycode",
#                  "rio", "rlang", "quantmod",
#                  "tidyquant", "zoo")
# install.packages(my_packages)

library(foreign)
library(reshape2)
library(countrycode)
library(tidyverse)
library(rlang)
library(quantmod)
library(tidyquant)
library(zoo)
library(rio)

# Set directories 

DATADIR <- "~/.../Data/Daily"


# Load and format spreads data --------------------------------------------

# Load EMBI Global Spreads

embig <- read.csv(file = file.path(DATADIR, "Datastream - EMBI Global Stripped Spread Daily (Clean).csv"),
                  stringsAsFactors = FALSE)

embig <- melt(embig, id.vars = "date",
              variable.name = "country",
              value.name = "embispread")

# Load Bloomberg's CDS Spreads

cds <- read.csv(file = file.path(DATADIR, "Bloomberg - CDS Daily (Clean).csv"),
                stringsAsFactors = FALSE)

cds <- melt(cds, id.vars = "date",
            variable.name = "country",
            value.name = "cdsspread")

# Formate dates

embig$date <- as.Date(embig$date, "%m/%d/%Y")
embig$month_year <- format(embig$date, "%m-%Y")
embig$year <- format(embig$date, "%Y")

cds$date <- as.Date(cds$date, "%m/%d/%Y")
cds$month_year <- format(cds$date, "%m-%Y")
cds$year <- format(cds$date, "%Y")

# Correct country names

embig$country <- as.character(embig$country)
embig$country <- gsub("\\.", " ", embig$country)

cds$country <- as.character(cds$country)
cds$country <- gsub("\\.", " ", cds$country)

# Add country codes

embig$iso3c <- countrycode(embig$country, origin = "country.name", destination = "iso3c", warn = TRUE)
embig$ccode <- countrycode(embig$country, origin = "country.name", destination = "cown", warn = TRUE)

cds$iso3c <- countrycode(cds$country, origin = "country.name", destination = "iso3c", warn = TRUE)
cds$ccode <- countrycode(cds$country, origin = "country.name", destination = "cown", warn = TRUE)

# Identify emerging markets (EMs + frontier markets)

em <- read.csv(file = file.path(DATADIR, "Emerging Markets.csv"),
               stringsAsFactors = FALSE)

em$iso3c <- countrycode(em$emerging, origin = "country.name", destination = "iso3c", warn = TRUE)
cds$em <- ifelse(cds$iso3c %in% em$iso3c | cds$iso3c %in% embig$iso3c, 1, 0)
rm(em)

# Calculate changes in spread

embig <- embig %>%
  group_by(country) %>%
  mutate(d_embispread = c(NA, diff(embispread))) %>%
  as.data.frame()

cds <- cds %>%
  group_by(country) %>%
  mutate(d_cdsspread = c(NA, diff(cdsspread))) %>%
  as.data.frame()

# Exclude missing spreads data

embig <- embig[!is.na(embig$embispread), ]
cds <- cds[!is.na(cds$cdsspread), ]


# Add risk factors for market model ---------------------------------------

# Construct (developed and emerging) index of spreads
# Index_i = median(spread_j)

spreadIndex <- function(spread_var, emerging = FALSE, data){
  
  countries <- unique(data$country)
  data$spread_index <- NA
  
  if(emerging == FALSE){
    
    for (i in countries){
      
      country_dates <- data[which(data$country == i), ]$date
      index <- subset(data, country != i)
    
      index <- index %>%
        group_by(date) %>%
        summarise(index = median(!!sym(spread_var))) %>%
        as.data.frame()
    
      data$spread_index[data$country == i & data$date %in% country_dates] <- index$index[index$date %in% country_dates]
    }
  }
  
  if(emerging == TRUE){
    
    for (i in countries){
      
      country_dates <- data[which(data$country == i), ]$date
      
      if(unique(data[which(data$country == i), ]$em) == 1){
        
        index <- subset(data, country != i & em == 1)}
      
      if(unique(data[which(data$country == i), ]$em) == 0){
        
        index <- subset(data, country != i & em == 0)}
      
      index <- index %>%
        group_by(date) %>%
        summarise(index = median(!!sym(spread_var))) %>%
        as.data.frame()
      
      data$spread_index[data$country == i & data$date %in% country_dates] <- index$index[index$date %in% country_dates]
      }
    }
  return(data)
}

embig <- spreadIndex(spread_var = "embispread", data = embig)
cds <- spreadIndex(spread_var = "cdsspread", emerging = TRUE, data = cds)

# Daily change in the spread index

embig <- embig %>%
  group_by(country) %>%
  mutate(d_spread_index = c(NA, diff(spread_index))) %>%
  as.data.frame()

cds <- cds %>%
  group_by(country) %>%
  mutate(d_spread_index = c(NA, diff(spread_index))) %>%
  as.data.frame()

# US interest rates
# 10-year Treasury constant maturity rate

getSymbols("DGS10", src = "FRED")
treasury10y <- as.data.frame(DGS10)
treasury10y$date <- as.Date(row.names(treasury10y))
colnames(treasury10y) <- c("treasury10y", "date")
treasury10y$treasury10y <- na.locf(treasury10y$treasury10y)
treasury10y$d_treasury10y <- c(NA, diff(treasury10y$treasury10y))

embig <- left_join(embig, treasury10y, by = "date")
cds <- left_join(cds, treasury10y, by = "date")

rm(DGS10, treasury10y)

# Oil price
# Crude Oil Prices: West Texas Intermediate (WTI) - Cushing, Oklahoma
# DCOILWTICO

getSymbols("DCOILWTICO", src = "FRED")
oil <- as.data.frame(DCOILWTICO)
oil$date <- as.Date(row.names(oil))
colnames(oil) <- c("oil", "date")
oil$oil <- na.locf(oil$oil)
oil$d_oil <- c(NA, diff(log(oil$oil)))

embig <- left_join(embig, oil, by = "date")
cds <- left_join(cds, oil, by = "date")

rm(DCOILWTICO, oil)

# VIX

vix <- tq_get("^VIX",
              get = "stock.prices",
              from = "1990-01-01")
vix <- as.data.frame(vix)
vix$vix <- vix$close
vix$d_vix <- c(NA, diff(vix$vix))

embig <- left_join(embig, vix[,c("date", "vix", "d_vix")], by = "date")
cds <- left_join(cds, vix[,c("date", "vix", "d_vix")], by = "date")

rm(vix)


# Add election dates ------------------------------------------------------

# Expand spreads dataset to include weekend dates (for elections that occur on weekends)

expandDate <- function(data){

  df_list <- list()
  
  for (i in unique(data$country)){
    
    df <- subset(data, country == i)
    df$exchange_meets <- 1
    date_seq <- seq.Date(min(df$date), max(df$date), by = 1)
    date_seq <- as.data.frame(date_seq)
    colnames(date_seq) <- "date"
    df <- full_join(df, date_seq, by = "date")
    df$exchange_meets[is.na(df$exchange_meets)] <- 0
    df$country <- i
    df <- df[order(df$date),]
    df_list[[length(df_list) + 1]] <- df
  }
  
  # Append all country datasets
  df <- do.call("rbind", df_list)
  
  # Add back country and time codes
  df$iso3c <- countrycode(df$country, origin = "country.name", destination = "iso3c", warn = TRUE)
  df$ccode <- countrycode(df$country, origin = "country.name", destination = "cown", warn = TRUE)
  df$month_year <- format(df$date, "%m-%Y")
  
  return(df)
  }

embig <- expandDate(embig)
cds <- expandDate(cds)


# Add elections data ------------------------------------------------------

load(file = file.path(DATADIR, "V-Dem-CD-v8 (Elections Only).RData"))

vdem_elec$date <- as.Date(vdem_elec$historical_date, "%Y-%m-%d")
vdem_elec$year <- as.numeric(vdem_elec$year)
vdem_elec$iso3c <- countrycode(vdem_elec$country_text_id, origin = "iso3c", destination = "iso3c")
elections <- vdem_elec

# Add type of system from DPI

dpi <- import(file = file.path(DATADIR, "DPI2015.dta"), format = "dta")

dpi$iso3c <- countrycode(dpi$ifs, origin = "wb", destination = "iso3c", warn = TRUE)

elections <- left_join(elections, dpi[,c("iso3c", "year", "system")], by = c("iso3c", "year"))
elections$system[elections$system == -999] <- NA
elections$system[elections$iso3c == "SRB"] <- 2

# Code presidential and parliamentary systems

elections$presidential <- ifelse(elections$system == 0, 1, 0)
elections$parliamentary <- ifelse(elections$system == 1 | elections$system == 2, 1, 0) 

# Code main election date variable

elections$election1r <- 0
elections$election1r <- ifelse(elections$parliamentary == 1, elections$v2eltype_0,
                                   ifelse(elections$presidential == 1, elections$v2eltype_6, 0))

elections$election1rdate <- NA
elections$election1rdate <- as.Date(elections$election1rdate)

elections$election1rdate[which(elections$election1r == 1)] <- elections$date[which(elections$election1r == 1)]

# Using 2nd round when available

elections$election2r <- 0
elections$election2r <- ifelse(elections$parliamentary == 1 & elections$v2eltype_1 == 0, elections$v2eltype_0,
                               ifelse(elections$parliamentary == 1 & elections$v2eltype_1 == 1, elections$v2eltype_1,
                                      ifelse(elections$presidential == 1 & elections$v2eltype_7 == 0, elections$v2eltype_6,
                                             ifelse(elections$presidential == 1 & elections$v2eltype_7 == 1, elections$v2eltype_7, NA))))

elections$election2rdate <- NA
elections$election2rdate <- as.Date(elections$election2rdate)

elections$election2rdate[which(elections$election2r == 1)] <- elections$date[which(elections$election2r == 1)]

# Merge into spreads dataset

# 1st round elections only

embig <- left_join(embig, elections[, c("date", "iso3c", "election1r")],
                   by = c("date", "iso3c"))
embig$election1r[is.na(embig$election1r)] <- 0

cds <- left_join(cds, elections[, c("date", "iso3c", "election1r")],
                   by = c("date", "iso3c"))
cds$election1r[is.na(cds$election1r)] <- 0

# 2nd round when available

embig <- left_join(embig, elections[, c("date", "iso3c", "election2r")],
                   by = c("date", "iso3c"))
embig$election2r[is.na(embig$election2r)] <- 0

cds <- left_join(cds, elections[, c("date", "iso3c", "election2r")],
                   by = c("date", "iso3c"))
cds$election2r[is.na(cds$election2r)] <- 0

# Adjust dates of elections that fall on weekend
# Move to next weekday

# EMBI

embig <- embig %>%
  group_by(iso3c) %>%
  mutate(election1r.l = dplyr::lag(election1r, n = 1),
         election1r.l2 = dplyr::lag(election1r, n = 2),
         election1r.l3 = dplyr::lag(election1r, n = 3),
         election1r.l4 = dplyr::lag(election1r, n = 4),
         election2r.l = dplyr::lag(election2r, n = 1),
         election2r.l2 = dplyr::lag(election2r, n = 2),
         election2r.l3 = dplyr::lag(election2r, n = 3),
         election2r.l4 = dplyr::lag(election2r, n = 4),
         exchange_meets.l = dplyr::lag(exchange_meets, n = 1),
         exchange_meets.l2 = dplyr::lag(exchange_meets, n = 2),
         exchange_meets.l3 = dplyr::lag(exchange_meets, n = 3),
         exchange_meets.l4 = dplyr::lag(exchange_meets, n = 4)) %>%
  as.data.frame()

embig$election1r_adj <- ifelse(embig$exchange_meets == 1 & embig$exchange_meets.l == 0 & embig$election1r.l == 1, 1,
                                         ifelse(embig$exchange_meets == 1 &
                                                  embig$exchange_meets.l == 0 & embig$exchange_meets.l2 == 0 &
                                                  embig$election1r.l == 0 & embig$election1r.l2 == 1, 1,
                                                ifelse(embig$exchange_meets == 1 &
                                                         embig$exchange_meets.l == 0 & embig$exchange_meets.l2 == 0 & embig$exchange_meets.l3 == 0 &
                                                         embig$election1r.l == 0 & embig$election1r.l2 == 0 & embig$election1r.l3 == 1, 1,
                                                       ifelse(embig$exchange_meets == 1 &
                                                                embig$exchange_meets.l == 0 & embig$exchange_meets.l2 == 0 & embig$exchange_meets.l3 == 0 & embig$exchange_meets.l4 == 0 &
                                                                embig$election1r.l == 0 & embig$election1r.l2 == 0 & embig$election1r.l3 == 0 & embig$election1r.l4 == 1, 1,
                                                              ifelse(embig$exchange_meets == 1 & embig$election1r == 1, 1,
                                                                     0)))))

embig$election2r_adj <- ifelse(embig$exchange_meets == 1 & embig$exchange_meets.l == 0 & embig$election2r.l == 1, 1,
                               ifelse(embig$exchange_meets == 1 &
                                        embig$exchange_meets.l == 0 & embig$exchange_meets.l2 == 0 &
                                        embig$election2r.l == 0 & embig$election2r.l2 == 1, 1,
                                      ifelse(embig$exchange_meets == 1 &
                                               embig$exchange_meets.l == 0 & embig$exchange_meets.l2 == 0 & embig$exchange_meets.l3 == 0 &
                                               embig$election2r.l == 0 & embig$election2r.l2 == 0 & embig$election2r.l3 == 1, 1,
                                             ifelse(embig$exchange_meets == 1 &
                                                      embig$exchange_meets.l == 0 & embig$exchange_meets.l2 == 0 & embig$exchange_meets.l3 == 0 & embig$exchange_meets.l4 == 0 &
                                                      embig$election2r.l == 0 & embig$election2r.l2 == 0 & embig$election2r.l3 == 0 & embig$election2r.l4 == 1, 1,
                                                    ifelse(embig$exchange_meets == 1 & embig$election2r == 1, 1,
                                                           0)))))

embig <- subset(embig, select = -c(election1r.l, election1r.l2, election1r.l3, election1r.l4,
                                   election2r.l, election2r.l2, election2r.l3, election2r.l4,
                                   exchange_meets.l, exchange_meets.l2, exchange_meets.l3, exchange_meets.l4))

# CDS

cds <- cds %>%
  group_by(iso3c) %>%
  mutate(election1r.l = dplyr::lag(election1r, n = 1),
         election1r.l2 = dplyr::lag(election1r, n = 2),
         election1r.l3 = dplyr::lag(election1r, n = 3),
         election1r.l4 = dplyr::lag(election1r, n = 4),
         election2r.l = dplyr::lag(election2r, n = 1),
         election2r.l2 = dplyr::lag(election2r, n = 2),
         election2r.l3 = dplyr::lag(election2r, n = 3),
         election2r.l4 = dplyr::lag(election2r, n = 4),
         exchange_meets.l = dplyr::lag(exchange_meets, n = 1),
         exchange_meets.l2 = dplyr::lag(exchange_meets, n = 2),
         exchange_meets.l3 = dplyr::lag(exchange_meets, n = 3),
         exchange_meets.l4 = dplyr::lag(exchange_meets, n = 4)) %>%
  as.data.frame()

cds$election1r_adj <- ifelse(cds$exchange_meets == 1 & cds$exchange_meets.l == 0 & cds$election1r.l == 1, 1,
                               ifelse(cds$exchange_meets == 1 &
                                        cds$exchange_meets.l == 0 & cds$exchange_meets.l2 == 0 &
                                        cds$election1r.l == 0 & cds$election1r.l2 == 1, 1,
                                      ifelse(cds$exchange_meets == 1 &
                                               cds$exchange_meets.l == 0 & cds$exchange_meets.l2 == 0 & cds$exchange_meets.l3 == 0 &
                                               cds$election1r.l == 0 & cds$election1r.l2 == 0 & cds$election1r.l3 == 1, 1,
                                             ifelse(cds$exchange_meets == 1 &
                                                      cds$exchange_meets.l == 0 & cds$exchange_meets.l2 == 0 & cds$exchange_meets.l3 == 0 & cds$exchange_meets.l4 == 0 &
                                                      cds$election1r.l == 0 & cds$election1r.l2 == 0 & cds$election1r.l3 == 0 & cds$election1r.l4 == 1, 1,
                                                    ifelse(cds$exchange_meets == 1 & cds$election1r == 1, 1,
                                                           0)))))

cds$election2r_adj <- ifelse(cds$exchange_meets == 1 & cds$exchange_meets.l == 0 & cds$election2r.l == 1, 1,
                               ifelse(cds$exchange_meets == 1 &
                                        cds$exchange_meets.l == 0 & cds$exchange_meets.l2 == 0 &
                                        cds$election2r.l == 0 & cds$election2r.l2 == 1, 1,
                                      ifelse(cds$exchange_meets == 1 &
                                               cds$exchange_meets.l == 0 & cds$exchange_meets.l2 == 0 & cds$exchange_meets.l3 == 0 &
                                               cds$election2r.l == 0 & cds$election2r.l2 == 0 & cds$election2r.l3 == 1, 1,
                                             ifelse(cds$exchange_meets == 1 &
                                                      cds$exchange_meets.l == 0 & cds$exchange_meets.l2 == 0 & cds$exchange_meets.l3 == 0 & cds$exchange_meets.l4 == 0 &
                                                      cds$election2r.l == 0 & cds$election2r.l2 == 0 & cds$election2r.l3 == 0 & cds$election2r.l4 == 1, 1,
                                                    ifelse(cds$exchange_meets == 1 & cds$election2r == 1, 1,
                                                           0)))))

cds <- subset(cds, select = -c(election1r.l, election1r.l2, election1r.l3, election1r.l4,
                                   election2r.l, election2r.l2, election2r.l3, election2r.l4,
                                   exchange_meets.l, exchange_meets.l2, exchange_meets.l3, exchange_meets.l4))

# Code adjusted election dummy

embig$election1r_dummy <- 0
embig$election1r_dummy[embig$election1r_adj == 1] <- 1

embig$election2r_dummy <- 0
embig$election2r_dummy[embig$election2r_adj == 1] <- 1

cds$election1r_dummy <- 0
cds$election1r_dummy[cds$election1r_adj == 1] <- 1

cds$election2r_dummy <- 0
cds$election2r_dummy[cds$election2r_adj == 1] <- 1

# Create election identifiers = countryname + month/year of election

embig$election1r_id <- ifelse(embig$election1r_dummy == 1,
                               paste(embig$country, format(embig$date, "%m/%Y"), sep = " "),
                               NA)
embig$election2r_id <- ifelse(embig$election2r_dummy == 1,
                              paste(embig$country, format(embig$date, "%m/%Y"), sep = " "),
                              NA)

cds$election1r_id <- ifelse(cds$election1r_dummy == 1,
                              paste(cds$country, format(cds$date, "%m/%Y"), sep = " "),
                              NA)
cds$election2r_id <- ifelse(cds$election2r_dummy == 1,
                              paste(cds$country, format(cds$date, "%m/%Y"), sep = " "),
                              NA)

# Remove weekends/non-trading days

embig <- subset(embig, exchange_meets == 1)
cds <- subset(cds, exchange_meets == 1)

# Save dataset

save(embig, file = file.path(DATADIR, "EMBI Spreads and Elections.Rdata"))
save(cds, file = file.path(DATADIR, "CDS Spreads and Elections.Rdata"))

